
/*

This program creates all of the tables, figures, and numbers reported in the Online Appendix of:

  - Sumit Agarwal, Shin-fen Cheng, Jussi Keppo, Long Wang, and Yang Yang. March 2025. "Information Provision and Search Frictions: Evidence from the Taxi Industry in Singapore" 

Key variables: 

	treat: equal to 1 for trips involving passenger pickups from any of the four terminals at Changi Airport, and 0 otherwise;
	after: equal to 1 for pre-treatment period spans from January 1, 2009, to December 24, 2009, and 0 otherwise; 
    treat_after: an interaction of treat and after;
	deadheading_speed: defined as deadheading distance divided by deadheading time;
	deadheading_time: time difference between the previous drop-off time and current pick-up time;
	trip_speed: defined as trip distance divided by trip time;
	trip_distance: distance between the current pick-up and next drop-off locations;
	trip_time: time difference between the current pick-up time and next drop-off time;
	trip_fare: the cost of the current taxi ride in Singapore dollars;
	driverid: the id of the taxi driver;
	taxi_num_id: the id of the taxi;
	previous_dropoff_district: the district code for previous drop off location;
	start_district: the district code for current pick up location;
	end_district: the district code for next drop off location;
	year: the year of the trip;
	month: the month of the trip;
	day: the day-of-month of the trip;
	ym: the year-month of the trip;
	date: the date of the trip;
	hour: the hour-of-day of the trip;
	terminal: the pick-up terminal of the trip;
	gap_arrival: the absolute difference between the hourly count of flight arrivals and the hourly taxi pick-ups;
	gap_departure: the absolute difference between the hourly count of flight departures and the hourly taxi drop-offs;
	gap_arrival_lag: the absolute difference between the laggeg hourly count of flight arrivals and the hourly taxi pick-ups;
	gap_departure_lag: the absolute difference between the hourly count of flight departures and the laggeg hourly taxi drop-offs;
		
/*	




***************************************
******	Table A1: Summary Statistics for the Full Sample
*************************************** 

use ReSTATs_replication_data_full,clear

preserve
keep treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
replace trip_fare=trip_fare/100
order treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
bys treat after: outreg2 using table_A1.doc, replace dec(2)  sum(detail)  eqkeep(N mean sd)
restore

***************************************
******	Table A2: Comparisons of Treatment Group and Different Control Groups
*************************************** 

use ReSTATs_replication_data_sub,clear

preserve
keep treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
replace trip_fare=trip_fare/100
order treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
bys treat after: outreg2 using table_A2.doc, replace dec(2)  sum(detail)  eqkeep(mean)
restore

preserve
keep if treat==1 | control1==1
keep treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
replace trip_fare=trip_fare/100
order treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
bys treat after: outreg2 using table_A2.doc, append dec(2)  sum(detail)  eqkeep(mean)
restore

preserve
keep if treat==1 | control2==1
keep treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
replace trip_fare=trip_fare/100
order treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
bys treat after: outreg2 using table_A2.doc, append dec(2)  sum(detail)  eqkeep(mean)
restore

preserve
keep if treat==1 | control3==1
keep treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
replace trip_fare=trip_fare/100
order treat after deadheading_speed deadheading_time trip_speed trip_distance trip_time trip_fare
bys treat after: outreg2 using table_A2.doc, append dec(2)  sum(detail)  eqkeep(mean)
restore


***************************************
******	Table A3: Estimation Results on Deadheading Behavior: Three Alternative Control Groups
*************************************** 

use ReSTATs_replication_data_sub,clear

******  Panel A: Alternative Control Group 1: 171 Shopping Malls
reghdfe ln_deadheading_speed treat_after if treat==1 | control1==1, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if treat==1 | control1==1, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

******  Panel B: Alternative Control Group 2: Sentosa Island+Zoo Area
reghdfe ln_deadheading_speed treat_after if treat==1 | control2==1, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if treat==1 | control2==1, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

******  Panel C: Alternative Control Group 3: Districts 16 and 18
reghdfe ln_deadheading_speed treat_after if treat==1 | control3==1, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if treat==1 | control3==1, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)


***************************************
******	Table A4: Estimation Results on Deadheading Behavior: Districts 17, 22, and 24
*************************************** 

use ReSTATs_replication_data_sub,clear

reghdfe ln_deadheading_speed treat_after if start_district==17 | start_district==22 | start_district==24, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if start_district==17 | start_district==22 | start_district==24, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

***************************************
******	Table A5: Estimation Results on the Number of Pickups at the Airport
*************************************** 

use ReSTATs_replication_data_taxi_volume.dta,clear
reghdfe ln_taxi_volume treat_after,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_T1.dta,clear
reghdfe ln_taxi_volume treat_after,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_T2.dta,clear
reghdfe ln_taxi_volume treat_after,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_T3.dta,clear
reghdfe ln_taxi_volume treat_after,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_B.dta,clear
reghdfe ln_taxi_volume treat_after,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume.dta,clear
reghdfe ln_taxi_volume treat_after if ym!=599,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_T1.dta,clear
reghdfe ln_taxi_volume treat_after if ym!=599,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_T2.dta,clear
reghdfe ln_taxi_volume treat_after if ym!=599,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_T3.dta,clear
reghdfe ln_taxi_volume treat_after if ym!=599,  abs(start_district date) vce(cluster start_district)

use ReSTATs_replication_data_taxi_volume_B.dta,clear
reghdfe ln_taxi_volume treat_after if ym!=599,  abs(start_district date) vce(cluster start_district)

***************************************
******	Table A6: Estimation Results on Deadheading Behavior: Sample Bias
*************************************** 

use ReSTATs_replication_data_sub,clear

reghdfe ln_deadheading_speed treat_after if previous_dropoff_district!=17, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after if previous_dropoff_district!=17, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

***************************************
******	Table A7: Decision-making Strategy of Taxi Drivers
*************************************** 

use ReSTATs_replication_data_sub,clear

reghdfe ln_trip_fare treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_trip_distance treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_trip_time treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_trip_speed treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

***************************************
******	Table A8: Unobserved Shocks at the Driver Level and District Level
*************************************** 

use ReSTATs_replication_data_sub,clear

egen driverid_ym=group( driverid ym )
egen start_district_hour=group( start_district hour )

******  Panel A: Driver-Level Shocks

reghdfe ln_deadheading_speed treat_after, abs(driverid_ym date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after, abs(driverid_ym date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

******  Panel B: District-Level Shocks

reghdfe ln_deadheading_speed treat_after, abs(driverid date previous_dropoff_district start_district_hour end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after, abs(driverid date previous_dropoff_district start_district_hour end_district) vce(cluster taxi_num_id)

***************************************
******	Table A9: Estimation Results on Deadheading Behavior: Falsification Tests
*************************************** 

******  Panel A: Fictitious Treatment Group

use ReSTATs_replication_data_sub,clear
drop if treat==1
append using fictious_treatment_group.dta
drop treat_after
gen treat_after=treat*after

reghdfe ln_deadheading_speed treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)

******  Panel B: Fictitious Treatment Date

use ReSTATs_replication_data_sub,clear
drop if after==1
drop after treat_after
gen after=1 if mdy>=18171
replace after=0 if after==.
gen treat_after=treat*after

reghdfe ln_deadheading_speed treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)
reghdfe ln_deadheading_time treat_after, abs(driverid date hour previous_dropoff_district start_district end_district) vce(cluster taxi_num_id)


***************************************
******	Table A10: Analyses on Revenue and Operation time
*************************************** 

use ReSTATs_replication_data_fare_daily.dta,clear

reghdfe ln_taxi_daily_fare treat_taxi_date_after treat_taxi_date,  abs(taxi_num_id date) cluster(taxi_num_id)
reghdfe ln_taxi_daily_time treat_taxi_date_after treat_taxi_date,  abs(taxi_num_id date) cluster(taxi_num_id)


***************************************
******	Table A11: Effects of TMS Introduction on Taxi On-Calls
*************************************** 

use ReSTATs_replication_data_full,clear

gen one=1
bys start_district trip_type date: egen total_call=total(one)
keep if trip_type==1
duplicates drop start_district date,force
gen ln_total_call=ln( total_call )

reghdfe ln_total_call treat_after,  abs(start_district date) vce(cluster start_district)
reghdfe ln_total_call treat_after if start_district==16 | start_district==17 | start_district==18,  abs(start_district date) vce(cluster start_district)


***************************************
******	Figure A6: Dynamic Changes in Driver Concentration Across Pickup Terminals and Picku Hours
*************************************** 

** gennerate the variable of HHI_terminal

use ReSTATs_replication_data_sub,clear
keep if treat==1
bys taxi_num_id year_month: gen month_airport_visit=_N
bys taxi_num_id start_loc year_month: gen month_terminal_visit=_N
duplicates drop taxi_num_id start_loc year_month,force
gen market_share= month_terminal_visit/month_airport_visit
gen market_share2= market_share*market_share
bys taxi_num_id year_month: egen HHI=total(market_share2)
duplicates drop taxi_num_id year_month,force
gen after1=1 if year==2010
replace after1=0 if after1==.
tab ym,gen(ym_id)

****** Panel A: HHI of Terminal

parmby "reghdfe HHI ym_id7-ym_id24, abs(taxi_num_id) vce(cluster taxi_num_id)", lab saving(23.dta, replace)

use 23.dta,clear

drop if parmseq==19
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to Signboard Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-0.08(0.02)0.04, nogmax nogextend tlength(mall) labsize(small)) title(, color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph export "figure_A6_Panel_A.png", as(png) name("Graph") replace

** gennerate the variable of HHI_hour

use ReSTATs_replication_data_sub,clear
keep if treat==1
bys taxi_num_id year_month: gen month_airport_visit=_N
bys taxi_num_id hour year_month: gen month_hour_visit=_N
duplicates drop taxi_num_id hour year_month,force
gen market_share= month_hour_visit/month_airport_visit
gen market_share2= market_share*market_share
bys taxi_num_id year_month: egen HHI=total(market_share2)
duplicates drop taxi_num_id year_month,force
gen after1=1 if year==2010
replace after1=0 if after1==.
tab ym,gen(ym_id)

****** Panel B: HHI of Hour

parmby "reghdfe HHI ym_id7-ym_id24, abs(taxi_num_id) vce(cluster taxi_num_id)", lab saving(24.dta, replace)

use 24.dta,clear

drop if parmseq==19
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to Signboard Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-0.08(0.02)0.04, nogmax nogextend tlength(mall) labsize(small)) title(, color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph export "figure_A6_Panel_B.png", as(png) name("Graph") replace

***************************************
******	Figure A7: Dynamic Change in Inclination and Daily Visits
*************************************** 

******  Panel A: Dynamic Change in Inclination

use ReSTATs_replication_data_full,clear

parmby "reghdfe treat ym_id7-ym_id24,  abs(driverid day hour previous_dropoff_district start_district end_distric) vce(cluster taxi_num_id)", lab saving(25.dta, replace)

use 25.dta,clear

drop if parmseq==19
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2

twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to Signboard Establishment ccc) ytitle(Coefficient,size(small))  ylabel(-0.02(0.01)0.06, nogmax nogextend tlength(mall) labsize(small)) title(, color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph export "figure_A7_Panel_A.png", as(png) name("Graph") replace

******  Panel B: Driver's Daily Frequency of Visiting the Airport

use ReSTATs_replication_data_daily_unbalanced.dta,clear

parmby "reghdfe ln_airport_daily_visit ym_id7-ym_id24 , abs(driverid day) vce(cluster taxi_num_id)", lab saving(26.dta, replace)

use 26.dta,clear

drop if parmseq==19
split parm,p("ym_id")
destring parm2,replace
drop if parm2==.
replace parm2=0 if parm2==12
replace parm2=parm2-12 if parm2<=11 &  parm2>0
replace parm2=parm2-12 if parm2>=13
sort parm2
twoway scatter estimate parm2 , color(navy) tline(0,lcolor(gs10) lpattern(dash)) yline(0) xlabel(#18,labsize(small)) xtitle(Months Relative to Signboard Establishment ,size(small)) ytitle(Coefficient,size(small))  ylabel(-0.02(0.02)0.1, nogmax nogextend tlength(mall) labsize(small)) title(, color(black) size(med)) graphregion(fcolor(white)) ||  line estimate parm2 , color(navy)|| (rcap min95 max95 parm2 , color(navy) lpattern(dash)), legend(off)
graph export "figure_A7_Panel_B.png", as(png) name("Graph") replace

